###---###---###---###---###---###---###---###---###---####
# Kerice Doten-Snitker
# 
# Exploratory analysis prep and data cleaning
# Prepared for:
# Doten-Snitker, K. 2021. "Contexts of State Violence: Jewish Expulsions in the Holy Roman Empire." Social Science History 45(1):131-163. https://doi.org/10.1017/ssh.2020.39

# Built under R 3.4.3 
# Platform: x86_64-apple-darwin15.6.0 (64-bit)
###---###---###---###---###---###---###---###---###---####

# set locale to preempt UTF-8 issues with the mydata
#Sys.getlocale()
#Sys.setlocale("LC_CTYPE", "en_US.UTF-8")

#for windows
#Sys.setlocale(category = "LC_ALL", locale = "English_United States.1252")

# add packages with the pacman library, which installs if not installed
library(pacman)
# use the here package for improving replicability
p_load(here)
# for data manipulation
p_load(magrittr, plyr, tidyverse, reshape2)
# do I still need foreign?


###---###---###---###---###---###---###---###---###---####
### Data is only settlement-period with certain Jewish present,
### that is, Juden!=88,
### AND no nichtstaedtische settlements.
### My own data entry replaces what I had used from VV, CY, KP.
###---###---###---###---###---###---###---###---###---####

mydata_b <- read.csv(here::here("in_data","Haverkamp_Ortskatalog_limited_notext.csv"), 
  header=TRUE, encoding = "latin1", stringsAsFactors=FALSE)

# recode from NAs instead of 0's binary/ordinal/count?
# (R adds NA automatically, but only in non-string columns)
# did this manually in Excel for relevant columns
# (some NA should stay NA!)
# mydata_b[is.na(mydata_b)] <- 0

# fix errors, if any

###---###---###---###---###---###---###---###---###---###
### other essential recoding ####
###---###---###---###---###---###---###---###---###---###

# recode a bunch of variables, from missing to 0
# assuming that absence of evidence (empty) is negative
mydata_b <- mydata_b %>% 
     mutate_at(c("Gmnde", "Ldrs", "Cmtry", "Syngg", "Mikva", "JQrtr", "OthJw",
       "VfTotal", "VfTotalPrev", "RmTot", "RmTotPrev", "HfTot", "HfTotPrev",
       "GeTot", "GeTotPrev", "VrtrbObs", "VrtTotPrev", "Bishop", "Reich", "King",
       "Prince", "Lesser", "Stadt", "Monstry", "AuthChgRts", "AuthChgSld",
       "AuthChgMtg", "AuthChgFf", "AuthChgCqr", "AuthChgFam", "AuthChgOth",
       "Residence", "Castle", "Fortified", "Seal", "Hospital", "LeperHosp",
       "University", "Transit", "Tolls", "Trade", "Mint", "Guilds", "Wine",
       "Cloth", "Leather", "Grain", "Agric", "Lumber", "Mine", "Salt", "Mkt",
       "Frgn", "StRt", "AuthT", "Vogt", "Amt", "Castellan", "Mayor", "Rat",
       "Schoeffen", "Schlthss", "HighJust", "Elections", "Diocese", "RelCm",
       "MilOrd", "Bund", "Hanse", "RegBund"), funs(ifelse(is.na(.),0,.)))

# make results easier to interpret by recoding Period
mydata_b$Period2 <- rep(NA, nrow(mydata_b))
mydata_b$Period2[mydata_b$Period==1100] <- 1
mydata_b$Period2[mydata_b$Period==1200] <- 2
mydata_b$Period2[mydata_b$Period==1250] <- 3
mydata_b$Period2[mydata_b$Period==1300] <- 4
mydata_b$Period2[mydata_b$Period==1350] <- 5
mydata_b$Period2[mydata_b$Period==1400] <- 6
mydata_b$Period2[mydata_b$Period==1450] <- 7
mydata_b$Period2[mydata_b$Period==1500] <- 8
mydata_b$Period2[mydata_b$Period==1520] <- 9

# and make Period dummies
mydata_b$Period1100 <- rep(0, nrow(mydata_b))
mydata_b$Period1100[mydata_b$Period==1100] <- 1
mydata_b$Period1200 <- rep(0, nrow(mydata_b))
mydata_b$Period1200[mydata_b$Period==1200] <- 1
mydata_b$Period1250 <- rep(0, nrow(mydata_b))
mydata_b$Period1250[mydata_b$Period==1250] <- 1
mydata_b$Period1300 <- rep(0, nrow(mydata_b))
mydata_b$Period1300[mydata_b$Period==1300] <- 1
mydata_b$Period1350 <- rep(0, nrow(mydata_b))
mydata_b$Period1350[mydata_b$Period==1350] <- 1
mydata_b$Period1400 <- rep(0, nrow(mydata_b))
mydata_b$Period1400[mydata_b$Period==1400] <- 1
mydata_b$Period1450 <- rep(0, nrow(mydata_b))
mydata_b$Period1450[mydata_b$Period==1450] <- 1
mydata_b$Period1500 <- rep(0, nrow(mydata_b))
mydata_b$Period1500[mydata_b$Period==1500] <- 1
mydata_b$Period1520 <- rep(0, nrow(mydata_b))
mydata_b$Period1520[mydata_b$Period==1520] <- 1

# recode Period for disjuncture
mydata_b$Period_dis <- rep(NA, nrow(mydata_b))
mydata_b$Period_dis[mydata_b$Period<1450] <- 0
mydata_b$Period_dis[mydata_b$Period>=1450] <- 1

# Mkt with 0's for no info, instead of NA
mydata_b$Mkt2 <- mydata_b$Mkt
mydata_b$Mkt2[is.na(mydata_b$Mkt)] <- 0

# count of religious communities, recode 77 to NA
mydata_b$RelCm2 <- mydata_b$RelCm
mydata_b$RelCm2[mydata_b$RelCm2==77] <- NA

# count of authorities (recode 77 to NA)
mydata_b$AuthT2 <- mydata_b$AuthT
mydata_b$AuthT2[mydata_b$AuthT2==77] <- NA

# recode binary persecutions
mydata_b$RmBin <- mydata_b$RmTot
mydata_b$RmBin[mydata_b$RmTot>0&mydata_b$RmTot<9] <- 1
mydata_b$RmBin[mydata_b$RmTot==9] <- NA
mydata_b$HfBin <- mydata_b$HfTot
mydata_b$HfBin[mydata_b$HfTot>0&mydata_b$HfTot<9] <- 1
mydata_b$HfBin[mydata_b$HfTot==9] <- NA
mydata_b$GeBin <- mydata_b$GeTot
mydata_b$GeBin[mydata_b$GeTot>0&mydata_b$GeTot<9] <- 1
mydata_b$GeBin[mydata_b$GeTot==9] <- NA
# recode to omit attempted persecutions
mydata_b$RmTot2[mydata_b$RmTot<9] <- mydata_b$RmTot[mydata_b$RmTot<9] # none
mydata_b$HfTot2[mydata_b$HfTot<9] <- mydata_b$HfTot[mydata_b$HfTot<9] # none
mydata_b$GeTot2[mydata_b$GeTot<9] <- mydata_b$GeTot[mydata_b$GeTot<9]
mydata_b$VfTotal2[mydata_b$VfTotal<9] <- mydata_b$VfTotal[mydata_b$VfTotal<9]
# but should I have an "persecution attempt" option?
mydata_b$RmTotAtt <- rep(0, nrow(mydata_b))
mydata_b$RmTotAtt[mydata_b$RmTot==9] <- mydata_b$RmTot[mydata_b$RmTot==9] # none
mydata_b$HfTotAtt <- rep(0, nrow(mydata_b))
mydata_b$HfTotAtt[mydata_b$HfTot==9] <- mydata_b$HfTot[mydata_b$HfTot==9] # none
mydata_b$GeTotAtt <- rep(0, nrow(mydata_b))
mydata_b$GeTotAtt[mydata_b$GeTot==9] <- mydata_b$GeTot[mydata_b$GeTot==9] # 1
mydata_b$VfTotalAtt <- rep(0, nrow(mydata_b))
mydata_b$VfTotalAtt[mydata_b$VfTotal==9] <- mydata_b$VfTotal[mydata_b$VfTotal==9] #1

# expulsion year (recode 77 to NA)
mydata_b$VrtrbYr1r <- mydata_b$VrtrbYr1
mydata_b$VrtrbYr1r[mydata_b$VrtrbYr1r==77] <- NA
mydata_b$VrtrbYr2r <- mydata_b$VrtrbYr2
mydata_b$VrtrbYr2r[mydata_b$VrtrbYr2r==77] <- NA

# identify France-wide expulsion waves in 1306, 1321/1323, 1394
mydata_b$VrtrbFr1 <- rep(0, nrow(mydata_b))
mydata_b$VrtrbFr1[mydata_b$VrtrbType1==77&mydata_b$VrtrbYr1==1306] <- 1
mydata_b$VrtrbFr2 <- rep(0, nrow(mydata_b))
mydata_b$VrtrbFr2[mydata_b$VrtrbType1==77&mydata_b$VrtrbYr1==1321|mydata_b$VrtrbYr1==1323] <- 1
mydata_b$VrtrbFr2[mydata_b$VrtrbType2==77&mydata_b$VrtrbYr2==1321|mydata_b$VrtrbYr2==1323] <- 1
mydata_b$VrtrbFr3 <- rep(0, nrow(mydata_b))
mydata_b$VrtrbFr3[mydata_b$VrtrbType1==77&mydata_b$VrtrbYr1==1394] <- 1
mydata_b$VrtrbFr3[mydata_b$VrtrbType2==77&mydata_b$VrtrbYr2==1394] <- 1

mydata_b$VrtrbFr <- rep(0, nrow(mydata_b))
mydata_b$VrtrbFr[mydata_b$VrtrbFr1==1|mydata_b$VrtrbFr2==1|mydata_b$VrtrbFr3==1] <- 1

ftable(mydata_b$VrtrbFr1) # 18
ftable(mydata_b$VrtrbFr2) # 10
ftable(mydata_b$VrtrbFr3) # 3
ftable(mydata_b$VrtrbFr) # total 25 settlement-periods

# create var for "at least 1 non-territorial expulsion in the period"
mydata_b$VrtrbLocal <- rep(0, nrow(mydata_b))
mydata_b$VrtrbLocal[mydata_b$VrtrbType1==1|mydata_b$VrtrbType2==1] <- 1

# create var for "at least 1 non-territorial expulsion attempt in the period"
mydata_b$VrtrbAtt <- rep(0, nrow(mydata_b))
mydata_b$VrtrbAtt[mydata_b$VrtrbType1==9|mydata_b$VrtrbType2==9] <- 1

# create var for "at least 1 territorial expulsion in the period"
mydata_b$VrtrbTerr <- rep(0, nrow(mydata_b))
mydata_b$VrtrbTerr[mydata_b$VrtrbType1==77|mydata_b$VrtrbType2==77] <- 1

# dropping French territorial expulsions
mydata_b$VrtrbTerrNoFr <- mydata_b$VrtrbTerr
mydata_b$VrtrbTerrNoFr[mydata_b$VrtrbFr==1] <- NA

# create var for "at least 1 expulsion of any kind in the period"
mydata_b$VrtrbAny <- rep(0, nrow(mydata_b))
mydata_b$VrtrbAny[mydata_b$VrtrbObs!=0&mydata_b$VrtrbObs!=88] <- 1

# recode from 88s to NA
# mydata_b[mydata_b==88] <- NA # includes IDs - yikes!
IDs <- c("ObsID","PlaceID")
mydata_b <- mydata_b %>% 
  mutate_at(.vars=vars(-one_of(IDs)), .funs=funs(ifelse(. == 88, NA, .)))

# rough count of total authorities, with upper tail truncated
mydata_b$AuthT3 <- rep(NA, nrow(mydata_b))
mydata_b$AuthT3[mydata_b$AuthT2==1] <- 1
mydata_b$AuthT3[mydata_b$AuthT2==2] <- 2
mydata_b$AuthT3[mydata_b$AuthT2>=3] <- 3

# rough count of transitions, with upper tail truncated
mydata_b$AuthChgT2 <- rep(NA, nrow(mydata_b))
mydata_b$AuthChgT2[mydata_b$AuthChgT==0] <- 0
mydata_b$AuthChgT2[mydata_b$AuthChgT>0] <- 1

# Jewish infrastructure
mydata_b$J_dev_sum <- rep(NA, nrow(mydata_b))
mydata_b$J_dev_sum <- mydata_b$Syngg+mydata_b$Cmtry+mydata_b$JQrtr+mydata_b$Mikva

mydata_b$J_dev_bin <- rep(NA, nrow(mydata_b))
mydata_b$J_dev_bin <- mydata_b$Syngg*mydata_b$Cmtry*mydata_b$JQrtr*mydata_b$Mikva

# Political infrastructure
mydata_b$Office_sum <- rep(NA, nrow(mydata_b))
mydata_b$Office_sum <- mydata_b$Rat+mydata_b$Mayor+mydata_b$Schoeffen+mydata_b$Schlthss

mydata_b$Office_bin <- mydata_b$Office_sum
mydata_b$Office_bin[mydata_b$Office_sum>=1] <- 1

# recode time-specific persecutions to exclude other time periods
mydata_b$SwssSldr[mydata_b$Period1500!=1] <- NA
mydata_b$Crsde1096[mydata_b$Period1100!=1] <- NA
mydata_b$Crsde1147[mydata_b$Period1200!=1] <- NA
mydata_b$Crsde1309[mydata_b$Period1350!=1] <- NA
mydata_b$Rintflsch[mydata_b$Period1300!=1] <- NA
mydata_b$GtrWerner[mydata_b$Period1300!=1] <- NA
mydata_b$Armleder[mydata_b$Period1350!=1] <- NA
mydata_b$Pest[mydata_b$Period1350!=1] <- NA

# map sector dummies
mydata_b$Sector_f <- as.factor(mydata_b$Sector)
